#!/usr/bin/env bash

# Print a warning about bad jobs hanging indefinitely in the queue.
# Homepage: https://github.com/OleHolmNielsen/Slurm_tools/

# This script may be run regularly from crontab

# List of Job Reason Codes (see "man squeue") which we look for
REASONLIST="JobHeldAdmin|BadConstraints|InvalidAccount|InvalidQOS|JobLaunchFailure|NodeDown|PartitionDown|PartitionInactive|PartitionNodeLimit|PartitionTimeLimit|ReqNodeNotAvail|WaitingForScheduling|JobHoldMaxRequeue"
# Suppress printing of jobs with non job related reasons:
DONTPRINT="Reserved for maintenance"

# Configure the columns printed by squeue
export SQUEUE_FORMAT="%.18i %.10P %.8j %.8u %.8a %.10T %.9Q %.10M %.9l %.6D %.6C %m %R"

if test -z "$DONTPRINT"
then
	squeue | egrep "$REASONLIST"
else
	squeue | egrep "$REASONLIST" | egrep -v "$DONTPRINT"
fi
